package com.shen.util;


import com.shen.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.lang.annotation.Documented;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

@Component
public class HtmlParseUtil {

    /**
     * 测试
     * @param args
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
       new HtmlParseUtil().parseJD("心理学").forEach(System.out::println);
    }


    /**
     * 封装方法
     */
    public List<Content> parseJD(String keyword) throws IOException {
        // 前提 需要联网
        String url="https://search.jd.com/Search?keyword="+keyword;
        Document document = Jsoup.parse(new URL(url),30000);
        Element list = document.getElementById("J_goodsList");
        Elements li = list.getElementsByTag("li");
        ArrayList<Content> goodsList=new ArrayList<>();
        for (Element element : li) {
            //封装对象
            Content content=new Content();
            content.setImg(element.getElementsByTag("img").eq(0).attr("data-lazy-img"));
            content.setTitle(element.getElementsByClass("p-name").eq(0).text());
            content.setPrice(element.getElementsByClass("p-price").eq(0).text());
            goodsList.add(content);
        }
        return goodsList;
    }
}
